package com.ml.room.terrace.process;

import cn.hutool.core.collection.CollectionUtil;
import com.alibaba.fastjson.JSON;
import com.ml.room.common.dto.RequestBuildingRecordDto;
import com.ml.room.common.dto.RequestProjectItemDto;
import com.ml.room.dao.IBuildingProjectDao;
import com.ml.room.dao.IBuildingProjectItemDao;
import com.ml.room.dao.IBuildingRecordDao;
import com.ml.room.repository.entity.BuildingProjectItem;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.model.HttpRequestBody;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Selectable;
import us.codecraft.webmagic.utils.HttpConstant;

import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

/**
 * @ClassName: BuildingProjectItemPageProcess
 * @Decription: 此处填写类文件说明
 * @Author: IDai
 * @Date: 2021-09-01 16:17 星期三
 **/
@Component
public class BuildingProjectItemPageProcess implements PageProcessor{
    @Autowired
    private IBuildingProjectDao buildingProjectDao;
    @Autowired
    private IBuildingProjectItemDao buildingProjectItemDao;

    /** 日志打印工具 **/
    private static final Logger logger = LoggerFactory.getLogger(BuildingProjectItemPageProcess.class);

    //设置抓取参数。详细配置见官方文档介绍 抓取网站的相关配置，包括编码、抓取间隔、重试次数等
    private final Site site = Site.me().setRetryTimes(3).setSleepTime(1000).setTimeOut(10000);

    //备案号的主键id
    public String recordId;


    public String getRecordId() {
        return recordId;
    }

    public void setRecordId(String recordId) {
        this.recordId = recordId;
    }

    @Override
    public void process(Page page) {
        //定义是否完成爬取，初始为false。
        boolean isFinish = false;
        Set<BuildingProjectItem> projectItems = new LinkedHashSet<>();
        List<Selectable> trContents = page.getHtml()
                .css("table#top tbody tr table").nodes().get(3)
                .css("tbody tr").nodes();
        logger.info("【合肥新房】 - 楼房详情 - 数据抽取，入参page：{}", JSON.toJSONString(trContents));
        String projectName = page.getHtml().css("table#top tr span#txtTitle font","text").toString();
        if (trContents.size() > 2){
            String projectId = buildingProjectDao.findIdByProjectName(projectName);
            if (StringUtils.isEmpty(projectId)){
                //如果为null，可能是带了开发商的前缀，这也是详情表里面的一个显示bug。
                projectId = buildingProjectDao.findIdByProjectName(StringUtils.substringAfter(projectName,"-"));
            }
            //如果列表中的数据大于2的话，说明除了表头和分页的两行，还有中间的楼房数据
            trContents.removeIf(trSelectable -> !StringUtils.equals(trSelectable.css("td","bgcolor").toString(),"#FFFFFF")||trSelectable.css("td").nodes().size() != 11);
            //执行数据抽取逻辑
            String finalProjectId = projectId;
            trContents.stream().forEach(trSelectable ->{
                        //获取单个tr下的所有td然后再解析数据存入数据库实体中
                        List<Selectable> tdContents = trSelectable.css("td").nodes();
                        if (CollectionUtil.isNotEmpty(tdContents)){
                            BuildingProjectItem projectItem = new BuildingProjectItem();
                            projectItem.setRecordId(recordId);
                            projectItem.setProjectId(finalProjectId);
                            projectItem.setBuildingNo(tdContents.get(0).css("div","text").toString());
                            projectItem.setRoomNumber(tdContents.get(1).css("div","text").toString());
                            int result = buildingProjectItemDao.isHaveThisBuildingProjectItem(recordId,projectItem.getBuildingNo(),projectItem.getRoomNumber());
                            if (result == 0){
                                //无重复则继续执行抽取逻辑。新增到待入库的列表中
                                projectItem.setHouseType(tdContents.get(2).css("div","text").toString());
                                projectItem.setRoomArea(tdContents.get(3).css("div","text").toString());
                                projectItem.setCommonArea(tdContents.get(4).css("div","text").toString());
                                projectItem.setCaseArea(tdContents.get(5).css("div","text").toString());
                                projectItem.setRecordPrice(tdContents.get(6).css("div","text").toString());
                                projectItem.setRecordAmount(tdContents.get(7).css("div","text").toString());
                                projectItem.setBuildingProperty(tdContents.get(8).css("div","text").toString());
                                projectItem.setDecorateState(tdContents.get(9).css("div","text").toString());
                                projectItem.setRemark(tdContents.get(10).css("div","text").toString());
                                projectItems.add(projectItem);
                            }
                        }
                    });
            }else {
            isFinish = true;
        }
        page.putField("projectItems", projectItems);
        if (CollectionUtil.isEmpty(projectItems)){
            isFinish = true;
        }
        page.putField("isFinish", isFinish);
        page.putField("recordId", recordId);
        logger.info("【合肥新房】 - 楼房详情 - 数据抽取，出参page：{}", JSON.toJSONString(projectItems));
    }

    @Override
    public Site getSite() {
        return site;
    }

    public static void main(String[] args) {
        Request request = new Request("http://220.178.124.94:8010/fangjia/ws/Detail2.aspx?Id=7956");
        RequestProjectItemDto projectItemDto = new RequestProjectItemDto();
        projectItemDto.set__EVENTARGUMENT("5");
        request.setRequestBody(HttpRequestBody.form(JSON.parseObject(JSON.toJSONString(projectItemDto), Map.class),"utf-8"));
        request.setMethod(HttpConstant.Method.POST);
        BuildingProjectItemPageProcess buildingProjectItemPageProcess = new BuildingProjectItemPageProcess();
        buildingProjectItemPageProcess.setRecordId("1430783405829779457");
        try {
            //创建并初始化执行页面抽取流程
            Spider.create(buildingProjectItemPageProcess)
                    //加入构造的接口访问逻辑
                    .addRequest(request)
                    //添加并执行页面抽取的数据处理
                    //开启5个线程抓取
                    .thread(5)
                    //启动爬虫
                    .run();
        }catch (Exception e){
           e.printStackTrace();
        }

    }
}
